################################################################################
## Group Identities and Parliamentary Debates: Replication package
## Fiva, Nedregård and Øien (2025)

# Description:

## Code to make Table C1: "Classification of policy areas based on reported 
## voter preferences: 1981-2017"

################################################################################

# Packages

library(data.table)
library(lubridate)
library(xtable)
library(dplyr)
library(stringr)




# Directories (wd is set by master.R)
data.dir           <- "../data/1_raw_data"
tab.dir            <- "../results/tables"
in_text            <- "../results/in_text"


# Data

## The data "SurveyPanel.dta" is confidential. See README.txt for getting
## access to the data.

d <- haven::read_dta(paste(data.dir, "SurveyPanel.dta", sep = "/"), encoding = "UTF8")
setDT(d)

# Data Cleaning


replacements <- list(
  "milj!" = "miljø",
  "n!ring" = "næring",
  "!konomi" = "økonomi",
  "sammensl!ing" = "sammenslåing",
  "milit!r" = "militær"
)

# Column names to apply the replacements
cols <- c("important1", "important2")

## Non-issue Respondents

# Apply replacements
for (col in cols) {
  for (pattern in names(replacements)) {
    set(d, i = NULL, j = col, value = gsub(pattern, replacements[[pattern]], d[[col]]))
  }
}

# Make dummy for "Non-issue" respondents (respondents who have "ingen" as a standalone word
# and people who have not answered c("important1", "important2")). There are 
# 1291 respondents who have "ingen" as a standalone word and 97% of them respond that 
# "no issues particularly important" ("ingen saker spesielt viktige"). 

## Make a dummy for non-issues
non.issues <- unique(grep("\\bingen\\b", d$important1, value = TRUE))



## There are also people people who answer "vet ikke /kan ikke si" ("don't know / can't say")
## or haven't voted, these are also designated as non-issue voters:


non.responses <- c("ikke stemt", 
                   "(8) nekter/vil ikke svare", 
                   "stemte ikke", 
                   "vet ikke/kan ikke si", 
                   "stemte ikke ved valget",
                   "vet ikke", 
                   "vet ikke /kan ikke si", 
                   "vet ikke, kan ikke si", 
                   "(9) vet ikke", "stemte  ikke",
                   "nei, egentlig ikke",
                   "irrelevant, ikke stemt.",
                   "io stemte ikke",
                   "ikke interessert",
                   "ikke",
                   "husker ikke noe spesielt",
                   "husker ikke ",
                   "har ikke stemt", "n/a")

## Responders with only digits are also non-issue voters

d.issues <- unique(grep("^\\d+$", d$important1, value = TRUE))

## Short responses that does not make any sense are also set as non-issue

too.short.response <- c(",",  "?",  ".a", "o",  "u") 


d[, non.issue := case_when(important1 %in% non.issues ~ 1,
                           important1 == "" & important2 == "" ~ 1, 
                           important1 %in% d.issues ~ 1,
                           important1 %in% non.responses | important1 %in% too.short.response ~ 1,
                           T ~ 0)]

words_vector <- c("abort", "samferdsel", "kollektiv", "jernbane", "vei", 
                  "veg", "skatt", "avgift", "bompenger", "økonomi", "utdanning", 
                  "skole", "skule", "student", "barn", "eldre", "familie", 
                  "helse", "sykehus", "sjukehus", "miljø", "innvandring", 
                  "innvandrere", "asyl", "velferd", "trygd", "arbeid", 
                  "sysselsetting", "bedrift", "handel", "næring", "landbruk", 
                  "jordbruk", "fiskeri", "utjevning", "fordeling", "pensjon", 
                  "desentralisering", "distrikt", "norsk", "norge", "militær", 
                  "forsvar")


d[, `:=` (important1 = tolower(important1),
          important2 = tolower(important2))]


# Iterate over each word in the words_vector
for (word in words_vector) {
  
  # Create a new column name based on the word
  new_col_name <- paste0("m", word)
  
  # Use set to create a new column in the dataset
  # The new column will be 1 if the word is in important1 or important2, and 0 otherwise
  set(d, j = new_col_name, value = as.integer(str_detect(d[["important1"]], regex(word, ignore_case = F)) | 
                                                str_detect(d[["important2"]], regex(word, ignore_case = F))))
}






d[, `:=` (Norway          = case_when(mnorsk == 1 | mnorge == 1 ~ 1, TRUE ~ 0),
  `regional policy`  = case_when(mdesentralisering == 1 | mdistrikt == 1 ~ 1, TRUE ~ 0),
  redistribution  = case_when(mutjevning == 1 | mfordeling == 1 ~ 1, TRUE ~ 0),
  agriculture     = case_when(mlandbruk==1|mjordbruk==1|mfiskeri==1 ~ 1, TRUE ~ 0),
  defense         = case_when(mmilitær==1|mforsvar==1 ~ 1, TRUE ~ 0),
  taxation        = case_when(mskatt==1 | mavgift==1 | mbompenger==1 ~ 1, TRUE ~ 0),
  education       = case_when(mutdanning==1 | mskole==1 | mskule==1 | mstudent==1 ~ 1, TRUE ~ 0),
  `health care`      = case_when(mhelse==1|msykehus==1|msjukehus==1 ~ 1, TRUE ~ 0),
  family          = case_when(mfamilie==1 ~ 1, TRUE ~ 0),
  environment     = case_when(mmiljø==1 ~ 1, TRUE ~ 0),
  children        = case_when(mbarn==1 ~ 1, TRUE ~ 0),
  pension         = case_when(mpensjon ==1 ~ 1, TRUE ~ 0),
  immigration     = case_when(minnvandring==1|minnvandrere == 1|masyl==1 ~ 1, TRUE ~ 0),
  economy         = case_when(møkonomi==1 ~ 1, TRUE ~ 0),
  transport       = case_when(msamferdsel==1|mkollektiv==1|mjernbane==1|mvei==1|mveg==1 ~ 1, TRUE ~ 0),
  welfare         = case_when(mvelferd==1|mtrygd==1 ~ 1, TRUE ~ 0),
  employment      = case_when(marbeid==1|msysselsetting==1 ~ 1, TRUE ~ 0),
  abortion        = case_when(mabort==1 ~ 1, TRUE ~ 0),
  `elder care`       = case_when(meldre==1 ~ 1, TRUE ~ 0),
  business        = case_when(mbedrift==1|mhandel==1|mnæring==1 ~ 1, TRUE ~ 0))]

# Regular expression to match standalone 'eu', 'ef', or 'eøs'
pattern <- "(^|\\s|[^\\w])(eu|ef|eøs|eec)($|\\s|[^\\w])"

# Extract items containing 'eu', 'ef', or 'eøs' as standalone words
eu_keywords1 <- unique(grep(pattern, d$important1, value = TRUE, perl = TRUE))
eu_keywords2 <- unique(grep(pattern, d$important2, value = TRUE, perl = TRUE))

d[, EU := case_when(important1 %in% eu_keywords1 ~ 1,
                    important2 %in% eu_keywords2 ~ 1, T ~ 0)]

d[, `:=` (other = as.integer(rowSums(.SD) == 0)), 
  .SDcols = c("EU", "abortion", "transport", "taxation", 
            "economy", "education", "children", "elder care", "family", "health care", 
            "environment", "immigration", "welfare", "employment", "business", 
            "agriculture", "redistribution", "pension", "regional policy", 
            "Norway", "defense")]

## Sum of whether you mention one of the topics

d[, `:=` (policy.area.sum = rowSums(.SD)), .SDcols = c("EU", "abortion", "transport", "taxation", 
                                           "economy", "education", "children", "elder care", "family", "health care", 
                                           "environment", "immigration", "welfare", "employment", "business", 
                                           "agriculture", "redistribution", "pension", "regional policy", 
                                           "Norway", "defense")]

## change non.issue if sum is not zero

d[, non.issue := case_when(policy.area.sum > 0 ~ 0, T ~ non.issue)]


## Other should be zero if non.issue respondent

d[, other := case_when(non.issue == 1 ~ 0, T ~ other)]


####################################################################

###### Classification of Policy Areas Based on Voter Priorities in 
###### Norwegian National Election Survey

###################################################################

policy_topics <- c("EU", "abortion", "transport", "taxation", "economy", 
                   "education", "children", "elder care", "family", 
                   "health care", "environment", "immigration", "welfare", 
                   "employment", "business", "agriculture", "redistribution", 
                   "pension", "regional policy", "Norway", "defense", "other",
                   "non.issue")

key_words_norwegian <- list(
  c("eu", "eec", "eøs"),
  c("abort"),
  c("samferdsel", "kollektiv", "jernbane", "vei", "veg"),
  c("skatt", "avgift", "bompenger"),
  c("økonomi"),
  c("utdanning", "skole", "skule", "student"),
  c("barn"),
  c("eldre"),
  c("familie"),
  c("helse", "sykehus", "sjukehus"),
  c("miljø"),
  c("innvandring", "innvandrere", "asyl"),
  c("velferd", "trygd"),
  c("arbeid", "sysselsetting"),
  c("bedrift", "handel", "næring"),
  c("landbruk", "jordbruk", "fiskeri"),
  c("utjevning", "fordeling"),
  c("pensjon"),
  c("desentralisering", "distrikt"),
  c("norsk", "Norge"),
  c("militær", "forsvar"),
  c(""), c("")
)

key_words_english <- list(
  c("eu", "eec", "eea"),
  c("abortion"),
  c("transport", "public transport", "railway", "road"),
  c("tax", "fee", "toll"),
  c("economy"),
  c("education", "school", "student"),
  c("children"),
  c("elderly"),
  c("family"),
  c("health", "hospital"),
  c("environment"),
  c("immigration", "immigrants", "asylum"),
  c("welfare", "social security"),
  c("work", "employment"),
  c("company", "trade", "business"),
  c("agriculture", "farming", "fishery"),
  c("equalization", "distribution"),
  c("pension"),
  c("decentralization", "district"),
  c("Norwegian", "Norway"),
  c("military", "defense"),
  c(""), c("")
)

fraction_of_respondents <- unlist(d[, lapply(.SD, function(x) round(mean(x)*100, 0)), .SDcols = policy_topics])

t_policy_areas <- data.table(policy_topics, key_words_norwegian, key_words_english, fraction_of_respondents)

t_policy_areas[, key_words_norwegian := sapply(key_words_norwegian, function(x) paste(x, collapse = ", "))]
t_policy_areas[, key_words_english := sapply(key_words_english, function(x) paste(x, collapse = ", "))]

## Fix words that mean the same thing.

t_policy_areas[key_words_norwegian == "samferdsel, kollektiv, jernbane, vei, veg",
               key_words_norwegian := "samferdsel, kollektiv, jernbane, vei/veg"]

t_policy_areas[key_words_norwegian == "utdanning, skole, skule, student",
               key_words_norwegian := "utdanning, skole/skule, student"]

t_policy_areas[key_words_norwegian == "helse, sykehus, sjukehus",
               key_words_norwegian := "helse, sykehus/sjukehus"]





t_policy_areas[policy_topics == "non.issue", policy_topics := "no issue"]
t_policy_areas[policy_topics == "other", policy_topics := "other issues"]

## Sort alphabetically

policy_order <- sort(c("EU", "abortion", "transport", "taxation", "economy", 
                       "education", "children", "elder care", "family", 
                       "health care", "environment", "immigration", "welfare", 
                       "employment", "business", "agriculture", "redistribution", 
                       "pension", "regional policy", "Norway", "defense"))

policy_order <- c(policy_order, "other issues", "no issue")
t_policy_areas[, string_order := factor(policy_topics, levels = policy_order)]

setorder(t_policy_areas, string_order)
t_policy_areas[, string_order := NULL]

## Save table

z <- xtable(t_policy_areas, method = c("compact"), booktabs = T, digits = 0)

print.xtable(z, only.contents = T, comment = F, hline.after = NULL, 
             include.colnames = F, include.rownames = F, 
             file = paste(tab.dir,"tabC1.tex", sep = "/"))





